## 加载包, 准备数据
library(tidyverse)
library(dplyr)
library(ggplot2)
library(scales)
library(ggsci)
library(cowplot)
library(RColorBrewer)
library(paletteer)
library(ggrepel)
library(ggridges)
# p值计算相关包
library(ggsignif) # 推荐 (ggplot 语法)
library(ggstatsplot) # 快捷探索性 (自定义度低)
data(mtcars)
data("diamonds") # 导入 ggplots 的内置数据
small_diamonds <- sample_n(diamonds, size = 500) # dplyr 中随机抽取 500 个值
## 画布, xy映射
ggplot(data = small_diamonds, aes(x = carat, y = price)) + # 创建画布
geom_point(aes(color = cut)) + # 创建点图, 颜色映射到 cut 列
scale_color_npg() + # 更改映射颜色 (CNS 标度)
theme_classic() # 更换 x, y轴主题
## Mark 👍
## 更改点的形状大小 (推荐 21)
## 添加黑框
## 图列
ggplot(data = small_diamonds, aes(x = carat, y = price)) +
geom_point(shape = 21, # 改变点的形状 (推荐为21 - 带黑框镂空)
size = 4, # 大小 (内置的形状序号)
stroke = 0.5, # 边框的粗细
aes(fill = cut)) + # 添加黑色边框, 填充色映射给 cut
scale_fill_npg() + # 标度更换为填充色
theme_classic()
## 点的形状还不满足可以下载扩展的包
## ggstar
## ggimage (图片的形式)
## Mark 👍
## 映射点的大小
## 泡泡图 (点较少比较好看)
## 透明度 alpha
## 去除色条 (连续/离散)
data(mtcars) # 加载数据
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point(shape = 21,
alpha = 0.5, # 设置透明度
aes(size = disp, fill = factor(cyl))) + # 点大小, 填充色的映射
# 数字变量默认为连续性, 出现色条, 需要 factor 将连续型变量转换为离散
scale_fill_npg() +
scale_size(range = c(1, 20)) + # 保留格子比较好看# 添加点大小的标度, range (设置点大小的范围)
theme_bw()
## 添加 title, x,y轴的标题, 图列标题
ggplot(data = small_diamonds, aes(x = carat, y = price)) +
geom_point(shape = 21, size = 4,
color = 'black', aes(fill = cut)) +
scale_fill_npg() +
labs(title = 'point plot', # 设置 title
x = 'weight of the diamond ', # 设置 x轴的标题
y = 'price in US dollars', # 设置 y轴的标题
fill = 'quality of the cut') + # 设置映射图列的标题
theme_classic()
## 设置x, y轴的刻度
ggplot(data = small_diamonds, aes(x = carat, y = price)) +
geom_point(shape = 21, size = 4,
color = 'black', aes(fill = cut)) +
scale_fill_npg() +
labs(title = 'point plot', # 设置 title
x = 'weight of the diamond ', # 设置 x轴的标题
y = 'price in US dollars', # 设置 y轴的标题
fill = 'quality of the cut') + # 设置映射图列的标题
scale_x_continuous(breaks = seq(0,3,0.5)) + # x轴的刻度 (0-3, 以0.5为刻度)
scale_y_continuous(breaks = seq(0, 15000, 5000), # y轴刻度 (0-15000, 5000)
labels = c('0', '5K', '10K', '15K')) + # 进一步设置 y轴刻度的标签
theme_classic()
## 主题布局
# devtools::install_github("calligross/ggthemeassist") # ggplot2 图形化美化插件
## 选中已有图代码 - 上方工具栏 - Addins
p_dv <-
ggplot(data = small_diamonds, aes(x = carat, y = price)) +
geom_point(shape = 21, size = 4,
color = 'black', aes(fill = cut)) +
scale_fill_npg() +
labs(title = 'point plot',
x = 'weight of the diamond ',
y = 'price in US dollars',
fill = 'quality of the cut') +
scale_x_continuous(breaks = seq(0,3,0.5)) +
scale_y_continuous(breaks = seq(0, 15000, 5000),
labels = c('0', '5K', '10K', '15K')) +
# ggplot Theme Assistant 生成
theme_classic() + theme(axis.title = element_text(size = 15,
face = "bold"), plot.title = element_text(size = 20,
face = "bold.italic", hjust = 0.5, vjust = 0.75),
legend.text = element_text(size = 15),
legend.title = element_text(size = 15),
strip.text = element_text(size = 10),
legend.position = c(0.1, 0.65)) + theme(legend.position = c(0.12, 0.65)) + theme(legend.text = element_text(size = 13),
legend.title = element_text(size = 13),
legend.position = c(0.14, 0.75)) + theme(axis.title = element_text(face = "bold.italic"))
p_dv
-————————————————————————————————————————————————————————————————
library(cowplot)
## Mark 👍
## 图形的拼接
### ggplot2 自带主题: theme_(bw), theme_few(), theme_cowplot(), theme_minimal()
p_dv1 <- p_dv + theme_bw()
p_dv1
# p_dv2 <- p_dv + theme_classic()
# p_dv3 <- p_dv + theme_dark()
# p_dv4 <- p_dv + theme_minimal_grid()
### cowp_dvlot 主题: theme_half_op_dven(), theme_minimal_grid(),
### theme_minimal_hgrid(), theme_minimal_vgrid()
# p_dv5 <- p_dv + theme_half_op_dven()
# p_dv6 <- p_dv + theme_minimal_grid()
# p_dv7 <- p_dv + theme_minimal_hgrid()
# p_dv8 <- p_dv + theme_minimal_vgrid()
# LETTERS() # 随机生成字母
# plot_grid(p_dv1, p_dv2, p_dv3, p_dv4, labels = LETTERS
# , ncol = 3 # 改变行列的布局
# ) # 将 p_dv1, p_dv2, p_dv3, p_dv4 拼接在一起, 并标注字母序号
# plot_grid(p_dv6, p_dv7, p_dv8, p_dv9, labels = LETTERS)
# plot_grid(p_dv1, p_dv2, p_dv3, p_dv4, labels = LETTERS[1:4])
# 教材p20
-————————————————————————————————————————————————————————————————
my_cols <- c('#00ADC8', '#D14749', '#FF912F', '#00B440', '#266199')
library(scales)
show_col(my_cols)
### 查看特定调色板的 十六进制颜色
# brewer_pal(palette = 'Set3')(5)
show_col(brewer_pal(palette = 'Set3')(5))
p_dv + scale_fill_manual(values = my_cols) # 自定义十六进制颜色
library(RColorBrewer)
par(mar=c(3,4,2,2))
display.brewer.all()
p_dv + scale_fill_brewer(palette = 'Set3') # R 自动整合了 RColorBrewer, 直接调用
#### (https://github.com/nanxstats/ggsci)
library(ggsci)
p_dv + scale_fill_npg() # 最常用
#### 将包括 ggsci在内的多个调色板整合
#### (https://github.com/EmilHvitfeldt/r-color-palettes)
library(paletteer)
p_dv + scale_fill_paletteer_d('pals::kelly') # d - 离散型, c - 连续型 (pals 下的 kelley)
p_cv <- ggplot(data = small_diamonds, aes(x = carat, y = price)) +
geom_point(shape = 21, size = 4,
color = 'black', aes(fill = depth)) +
theme_classic()
p_cv + scale_fill_gradient(low = '#FCDAC9', high = '#7C0D0D')
p_cv + scale_fill_gradient2(low = '#2D4971',
high = '#9F192B',
mid = 'white', # 分别设置 低, 中, 高
midpoint = 60) # 中间色的位置
p_cv + scale_fill_gsea()
p_cv + scale_fill_paletteer_c('pals::kovesi.diverging_bwr_40_95_c42') # 自动计算了中间值
-————————————————————————————————————————————————————————————————
## PCAtools 默认生成的图
library(PCAtools)
gene_exp_PCA <- read.table(file = '/Volumes/Cullwen_SSD/Genek/5 R语言出版级绘图(2020)/plot_training_20200701/data/rnaseq-apple/gene_exp.txt',
sep = '\t', header = T, row.names = 1)
sample_info_PCA <- read.table(file = '/Volumes/Cullwen_SSD/Genek/5 R语言出版级绘图(2020)/plot_training_20200701/data/rnaseq-apple/sample_info.txt',
sep = '\t', header = T, row.names = 1)
pca <- pca(gene_exp_PCA, metadata = sample_info_PCA) # PCA 分析结果
biplot(pca, x = 'PC1', y = 'PC2') # PCA 自带的可视化
####
library(tidyverse)
pca_res <- rownames_to_column(pca$rotated, var = 'sample_name') %>%
left_join(rownames_to_column(sample_info_PCA, var = 'sample_name'),
by = 'sample_name')
library(cowplot)
ggplot(pca_res, aes(x = PC1, y = PC2)) +
geom_point(size = 8, aes(shape = strain, fill = stage)) +
scale_shape_manual(values = c(21, 24)) +
stat_ellipse(aes(color = stage)) + # 添加环状元素置信区间
theme_half_open() +
# ggplot Theme Assistant
theme(legend.direction = "horizontal",
legend.position = c(0.2, 0.9), axis.title = element_text(size = 14),
plot.title = element_text(size = 15)) +labs(x = "PC1 (68% variance explained)", # 解析度来源 pca$variance
y = "PC2 (11% variance explained)") +
# 解决图列没有映射颜色的问题
guides(fill = guide_legend(override.aes=list(shape=21)))
-————————————————————————————————————————————————————————————————
library(readxl)
library(tidyverse)
#### Execl的导入 (右上窗口 - Import Dataset, 注意数值类型)
#### 生成代码
de_result_volcano <- read_excel("/Volumes/Cullwen_SSD/Genek/5 R语言出版级绘图(2020)/plot_training_20200701/data/vocano/de_result.xlsx",
col_types = c("numeric", "text", "text",
"text", "text", "numeric", "numeric",
"numeric", "numeric", "text", "text",
"text", "text", "text", "text", "text",
"text", "text"), na = c("NA"))
#### 手动输入选取需要在火山图上展示的基因
selected_genes <- c('FMP27', 'ERG251', 'C5_04050W',
'C7_02530C', 'NOT5', 'C6_03800C',
'PMC1', 'FEN1', 'ERG3', 'FEN12',
'ERG25', 'ERG6', 'MVB12', 'FGR32',
'ERG28', 'ERG27')
res <-
select(de_result_volcano, Rank, GENE_NAME, log2FoldChange, pvalue) %>%
mutate(direction = if_else(pvalue > 0.05 | abs(log2FoldChange) < 1, # 增加 direction 列, if_else 判断
'NS', # 正确返回
if_else(log2FoldChange >= 1, # 否则返回进一步判断
'UP',
'DOWN'))) %>%
mutate(selected = if_else(GENE_NAME %in% selected_genes,
'Show', '-'))
#### 根据 selected_genes 中字符从 res 中 GENE_NAME 提取行
# res_selected <- filter(res, GENE_NAME %in% selected_genes)
library(ggrepel)
ggplot(res, aes(x = log2FoldChange,
y = -log10(pvalue))) + # pvalue以log10转换并取相反数
geom_point(aes(color = direction),
size = 3,
show.legend = F) + # 去掉图列 (属于点图的图层)
geom_point(data = filter(res, selected == 'Show'), # 重新指定 data, 筛选后的向量
# data = res_selected,
size = 3,
shape = 21, # 带黑框的形状
stroke = 1) + # 边框的粗细
geom_text_repel(data = filter(res, selected == 'Show'),
# data = res_selected,
size = 3,
box.padding = 0.5, # 标签距离
aes(label = GENE_NAME)) +
geom_hline(yintercept = -log10(0.05), # 添加另一个集合元素 (直线), 坐标为 -log10(0.05)
linetype = 'dotdash', # 直线的样式 (虚线)
# solid, dashed, dotted, dotdash, longdash, twodash
color = 'grey50') +
geom_vline(xintercept = c(-1,1),
linetype = 'dotdash',
color = 'grey50') +
scale_colour_manual(values = c('#1500FF', '#A9A9A9', '#FF0102')) +
ylim(0, 50) + # 设置y轴的范围
labs(x = 'Log2(fold change)',
y = '-log10(p-value)') +
theme_half_open()
-————————————————————————————————————————————————————————————————
p_scatter_plot <-
ggplot(data = small_diamonds, aes(x = carat, y = price)) +
geom_point(shape = 21,
size = 4,
stroke = 0.5,
aes(fill = cut)) +
scale_fill_npg() +
theme_classic()
p_scatter_plot + geom_smooth(method = 'lm')
# 选择拟合模式 (默认为局部拟合 loess, 这里的 lm 是线性拟合)
# 拟合出来的曲线位于对角线 (表示相关)
cor.test(small_diamonds$carat, small_diamonds$price, method = 'pearson') # 计算相关系数和 p-value
##
## Pearson's product-moment correlation
##
## data: small_diamonds$carat and small_diamonds$price
## t = 47.95, df = 498, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8896639 0.9210861
## sample estimates:
## cor
## 0.9066241
p_scatter_plot + geom_smooth(method = 'lm') +
# ggplot Theme Assistant
theme(legend.position = c(0.1, 0.82)) + # 图例位置
# 添加文本备注信息 (给予坐标位置)
annotate('text', x= 2.2, y = 10, label = 'r = 0.917; pvalue = 2.2e-16')
library(ggExtra)
### 类似 ggplot Theme Assistant 用法
### 自动保存到向量 p1 中
p1 <- p_scatter_plot + geom_smooth(method = 'lm') +
# ggplot Theme Assistant
theme(legend.position = c(0.1, 0.82)) + # 图例位置
# 添加文本备注信息 (给予坐标位置)
annotate('text', x= 2.2, y = 10, label = 'r = 0.917; pvalue = 2.2e-16')
### 生成代码
ggExtra::ggMarginal(
p = p1,
type = 'histogram',
margins = 'both',
size = 5,
colour = 'black',
fill = 'gray82'
)
# 扩展了解 ggstatsplot (https://github.com/IndrajeetPatil/ggstatsplot)
p1 + geom_rug(aes(color = cut), # 添加地毯线
length = unit(3, 'mm')) + # 设置绝对长度
scale_color_npg() # 更换地毯线的标度颜色, 与散点的颜色保持一致
-————————————————————————————————————————————————————————————————
### 拟合曲线需要在多个点之间计算曲线 (群体几何对象)
### 散点只需要 x,y 轴值 (个体几何对象)
### 根据所有的点来拟合曲线?根据分组信息拟合曲线?(因此需要指定 group)
### group = 1 (不分组, 按所有点进行拟合), group = cut (按照 cut 组进行拟合)
ggplot(data = small_diamonds, aes(x = carat, y = price)) +
geom_point(shape = 21,
size = 4,
stroke = 0.5,
aes(fill = cut)) + # fill 的映射仅在 point 图层生效
geom_smooth() +
scale_fill_npg() + # 不指定 group (默认不分组,以所有点进行拟合)
theme_classic()
### 继承的理解
ggplot(data = small_diamonds, aes(x = carat, y = price, fill = cut)) +
geom_point(shape = 21,
size = 4,
stroke = 0.5) + # 虽然没有指定映射, 但继承背景图层关系
geom_smooth() +
# 同理继承 并 按照已有的离散型映射分组 (这里背景中只有 cut 为离散型, carat 和 price 都为连续)
scale_fill_npg() +
theme_classic()
### 多态的理解
ggplot(data = small_diamonds, aes(x = carat, y = price, fill = cut)) +
geom_point(shape = 21,
size = 4,
stroke = 0.5) +
geom_smooth(aes(color = cut)) + # 在继承的基础上自定义 (把曲线的颜色映射给cut)
scale_fill_npg() +
theme_classic()
# exp:
HIV <-
tibble(Triplet = str_c('Triplet', 1:7, sep = ' '),
From = c(rep('Zambia', 4), rep('South Africa', 3)),
`Group A` = c('28/1687 (1.64)',
'33/2086 (1.57)',
'23/1695 (1.36)',
'41/2013 (2.04)',
'36/1507 (2.35)',
'26/1808 (1.43)',
'13/2195 (0.57)'),
`Group B` = c('19/1979 (0.94)',
'29/2408 (1.20)',
'22/1687 (1.30)',
'19/1698 (1.13)',
'33/1811 (1.80)',
'26/2078 (1.24)',
'10/2488 (0.40)'),
`Group C` = c('24/2054 (1.17)',
'33/2262 (1.48)',
'29/1811 (1.63)',
'37/1561 (2.39)',
'28/1304 (2.15)',
'32/1375 (2.31)',
'14/2195 (0.59)'))
HIV_tidy <-
gather(HIV, key = Group, value = Value, 3:5) %>%
# 对 HIV 的 3到5 进行转置处理
# 添加 Group, Value 表头
separate(col = Value, # 对 Value 列进行处理
sep = ' ', # 设置分隔符类型, 这里以空格分割
into = c('Number', 'Ratio')) %>% # 分割成两列的表头
separate(col = Number,
sep = '/',
into = c('Num', 'Total')) %>% # 按/分割
mutate(Ratio = as.numeric( # 设置数值类型
str_remove_all( # 去掉字符
Ratio, '\\(|\\)')), # 对 Ratio 列进行处理, ()特殊, 需要\\转译
Num = as.numeric(Num),
Total = as.numeric(Total))
# 标准
filter(HIV_tidy, Group %in% c('Group A', 'Group C')) %>%
ggplot(aes(x = Group, y = Ratio)) +
geom_line(aes(
color = Triplet, # 映射写在各几何图层
group = Triplet)) +
geom_point(aes(
color = Triplet, # 映射写在各几何图层
size = Num)) +
scale_color_jco() +
scale_size(range = c(1, 4)) +
theme_classic()
# 背景图层映射的继承
filter(HIV_tidy, Group %in% c('Group A', 'Group C')) %>%
ggplot(aes(x = Group, y = Ratio,
color = Triplet # 映射写在背景图层继承
)) +
geom_line(aes(group = Triplet)) +
geom_point(aes(size = Num)) +
scale_color_jco() +
scale_size(range = c(1, 4)) +
theme_classic()
-————————————————————————————————————————————————————————————————
library(gapminder)
library(tidyverse)
library(cowplot)
data("gapminder")
filter(gapminder, country %in% c('China', 'India', 'Japan')) %>% # 对 gapminder 数据进行过滤并管道给 ggplot 作图
ggplot(aes(x = year, y = lifeExp, color = country)) + # 按 country 进行分组并映射给颜色
geom_line() + # 群体几何对象, 继承上一层的分组信息
geom_point(shape = 21, size = 2, fill = 'white') + # 空心圆, 图层的顺序
scale_color_aaas() +
theme_minimal_hgrid() +
# ggplot Theme Assistant
theme(axis.text.x = element_text(angle = 45, vjust = 0.5)) # x 轴标签旋转 45度, 并下移
### 写入数据
HIV <-
tibble(Triplet = str_c('Triplet', 1:7, sep = ' '),
From = c(rep('Zambia', 4), rep('South Africa', 3)),
`Group A` = c('28/1687 (1.64)',
'33/2086 (1.57)',
'23/1695 (1.36)',
'41/2013 (2.04)',
'36/1507 (2.35)',
'26/1808 (1.43)',
'13/2195 (0.57)'),
`Group B` = c('19/1979 (0.94)',
'29/2408 (1.20)',
'22/1687 (1.30)',
'19/1698 (1.13)',
'33/1811 (1.80)',
'26/2078 (1.24)',
'10/2488 (0.40)'),
`Group C` = c('24/2054 (1.17)',
'33/2262 (1.48)',
'29/1811 (1.63)',
'37/1561 (2.39)',
'28/1304 (2.15)',
'32/1375 (2.31)',
'14/2195 (0.59)'))
### 宽数据 (不利于数据分析)
姓名 语文 数学 英语
张三 70 80 90
李四 60 70 80
### 长数据
姓名 科目 成绩
张三 语文 70
张三 数学 80
张三 英语 90
李四 语文 60
李四 数学 70
李四 英语 80
library(tidyverse)
HIV_tidy <-
gather(HIV, key = Group, value = Value, 3:5) %>%
# 对 HIV 的 3到5 进行转置处理
# 添加 Group, Value 表头
separate(col = Value, # 对 Value 列进行处理
sep = ' ', # 设置分隔符类型, 这里以空格分割
into = c('Number', 'Ratio')) %>% # 分割成两列的表头
separate(col = Number,
sep = '/',
into = c('Num', 'Total')) %>% # 按/分割
mutate(Ratio = as.numeric( # 设置数值类型
str_remove_all( # 去掉字符
Ratio, '\\(|\\)')), # 对 Ratio 列进行处理, ()特殊, 需要\\转译
Num = as.numeric(Num),
Total = as.numeric(Total))
# (强大的添加标签 R 包: https://ggrepel.slowkow.com/articles/examples.html)
p1 <-
filter(HIV_tidy, Group %in% c('Group A', 'Group C')) %>% # 对数据进行过滤, Group 中提取 A, C
ggplot(aes(x = Group, y = Ratio, color = Triplet)) + # Triplet 映射 给 color 写在背景图层继承
geom_line(aes(group = Triplet)) + # 按 Triplet 分组配对折线
geom_point(aes(size = Num)) + # 仅对点图层设置点的大小映射
geom_text_repel( # 强大的添加标签 R 包
data = filter(HIV_tidy, Group == 'Group A'), # 重定义分组信息, 取消背景图层的继承
nudge_x = -0.1, # 标签在 x 轴上位移
min.segment.length = Inf, # 去掉标签引线
aes(label = str_remove(Triplet, 'Triplet'))) + # tidyverse 中去掉分割字符串
geom_text_repel(
data = filter(HIV_tidy, Group == 'Group C'),
nudge_x = 0.1,
min.segment.length = Inf,
aes(label = str_remove(Triplet, 'Triplet'))) +
scale_color_jco() +
scale_size(range = c(1, 4), # 设置点大小刻度的范围
# 统一图列 (设置点大小图列范围)
breaks = seq(10, 50, 10), # 以 10 - 40 以 10 位间隔分割
labels = seq(10, 50, 10), # 对应的图例标签
limits = c(10, 50) # 确保 10 - 40 范围
) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 2.5)) + # 设置 y 轴从 0 开始, 范围 0 - 2.5
labs(x = '', # 去掉 x 轴标题 (设置为空)
y = 'HIV infections\n(per 100 persons)') + # 设置 y 轴标题
theme_half_open()
p1
p2 <-
filter(HIV_tidy, Group %in% c('Group A', 'Group B')) %>%
ggplot(aes(x = Group, y = Ratio, color = Triplet)) +
geom_line(aes(group = Triplet)) +
geom_point(aes(size = Num)) +
geom_text_repel(
data = filter(HIV_tidy, Group == 'Group A'),
nudge_x = -0.1,
min.segment.length = Inf,
aes(label = str_remove(Triplet, 'Triplet'))) +
geom_text_repel(
data = filter(HIV_tidy, Group == 'Group B'),
nudge_x = 0.1,
min.segment.length = Inf,
aes(label = str_remove(Triplet, 'Triplet'))) +
scale_color_jco() +
scale_size(range = c(1, 4),
breaks = seq(10, 50, 10),
labels = seq(10, 50, 10),
limits = c(10, 50)) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 2.5)) +
labs(x = '',
y = 'HIV infections\n(per 100 persons)') +
theme_half_open()
p2
library(patchwork)
# (强大的拼图 R 包)
p1 + p2 +
plot_layout(guides = "collect") & # 收集图列, 整合图列
theme(legend.position = 'top') # 放置顶部
### 其他用法:
### (https://patchwork.data-imaginist.com/articles/patchwork.html)
# p1 + p2 # 左右排列
# p1 / p2 # 上下排列
# p1 | (p2 / p3) # 1 2/3 排列
# p1 + p2 + p3 +
# plot_annotation(tag_levels = 'I') # 生成序号
# p1 + p2 + p3 + p4
# p1 + p2 + p3 + p4 + plot_layout(nrow = 3, byrow = FALSE) # 设定行列排布
# p1 +
# p2 + labs(subtitle = 'This will appear in the last plot') # 添加副标题
# (p1 | (p2 / p3)) +
# plot_annotation(title = 'The surprising story about mtcars') # 设置主标题
# etc.
-————————————————————————————————————————————————————————————————
## 读取数据
data('mtcars') # 有行名的 dataframe (不利于作图)
mtcars_table <- rownames_to_column(mtcars, var = 'car') %>% # 把行名作为变量
mutate(cyl = factor(cyl)) # cyl 列转换为因子, 避免默认离散型变量的图列色条
mtcars_table$cyl # Levels - 因子
## [1] 6 6 4 6 8 6 8 4 4 6 6 8 8 8 8 8 8 4 4 4 4 8 8 8 8 4 4 4 8 6 8 4
## Levels: 4 6 8
ggplot(data = mtcars_table, aes(x = car, y = mpg)) +
geom_col(aes(fill = cyl), # aes(fill = factor(cyl))
width = 0.7) + # 条形图的宽度 (0.7最好)
scale_fill_npg() +
theme_minimal() +
theme(axis.text.x = element_text( # 对 x 轴标签字体属性进行修改
angle = 90, # 旋转 90 度
hjust = 1, # 标签左右位移, 0 - 左对齐, 1 - 右对齐
vjust = 0.3), # 标签高度位移, 正往上位移, 负往下位移
legend.position = 'top') # 图例放顶部
### 对数据进行排序
#### Rstudio 对离散型变量是默认是没有顺序, 按首字母进行排序
#### 需要对离散型变量进行排序, 则需要将其转换为 factor 并自定义设置 levels
mtcars_table <- rownames_to_column(mtcars, var = 'car') %>%
mutate(cyl = factor(cyl)) %>%
arrange(cyl, desc(mpg)) %>% # 默认升序排序, 先对 cyl 进行排序 = 以 cyl 来分组, 再对 mpg 从高到低排序
mutate(mtcars_table, car = factor(car, levels = car))
### 描述数据高出平均值多少个标准差
### 处理数据
mtcars_z_score <- mutate(mtcars_table, mpg_z = (mpg - mean(mpg)) / sd(mpg)) %>%
# 计算与平均数相差的标准差倍数
mutate(direction = if_else(mpg_z >= 0, 'higt', 'low')) %>% # 条件判断
arrange(desc(direction), mpg_z) %>%
mutate(car = factor(car, levels = car))
#### 作图
ggplot(mtcars_z_score, aes(x = car, y = mpg_z)) +
geom_col(aes(fill = direction), width = 0.7) +
theme_classic() +
scale_fill_npg() +
theme_minimal() + # z-core 图适合 minimal 主题
theme(axis.text.x = element_text(
angle = 90,
hjust = 1,
vjust = 0.3),
legend.position = 'top') +
theme(legend.position = "top", legend.direction = 'horizontal') +labs(fill = 'cyl') +
labs(x = NULL) +
coord_flip() # 翻转坐标系
# 点与线段组合
ggplot(data = mtcars_table, aes(x = car, y = mpg)) +
geom_point(aes(color = cyl,
), size = 3) +
geom_segment(aes(x = car, # 线段的 x 轴的起始位置
xend = car, # 线段的 x 轴的终止位置
y = 1, # 线段的 y 轴的起始位置
yend = mpg, # 线段的 y 轴的终止位置
color = cyl,
width = 3)) +
scale_color_npg() +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,
hjust = 1,
vjust = 0.3),
legend.position = 'top') +
theme(legend.direction = "horizontal") +
labs(x = NULL, size = NULL)
扩展补充 (截断坐标轴) https://www.jianshu.com/p/de283990ecd1 https://stackoverflow.com/questions/7194688/using-ggplot2-can-i-insert-a-break-in-the-axis
#### 适用的数据类型
# 大分类 包含 各个小分类的数据
# 各个 小分类 占总分类的对比
# 一般堆叠条形图和百分比堆叠条形图
#### 过滤数据
small_diamonds <- filter(diamonds, cut %in% c('Ideal', 'Premium', 'Good'))
#### 分组统计
small_diamonds_sum <-
group_by(small_diamonds, color, cut) %>% # 注意分组的顺序, 主分类放前面
summarise(count = n())
#### 作图
ggplot(data = small_diamonds_sum, aes(x = color, y = count)) +
geom_col(aes(fill = cut), width = 0.7) +
scale_fill_lancet() +
scale_y_continuous(expand = c(0, 0)) + # y 轴坐标从 0 开始
theme_classic()
#### 添加标签
# ggplot(data = small_diamonds_sum, aes(x = color, y = count)) +
# geom_col(aes(fill = cut), width = 0.7) +
# scale_fill_lancet() +
# scale_y_continuous(expand = c(0, 0)) + # y 轴坐标从 0 开始
# # ? geom_text(aes(x = color, y = count, label = count)) +
# # (错误) y 轴位置需要计算 (与前一组分进行累加 再减去自身分组 count 的一半) 且 需要倒序后加标签
# theme_classic()
# 处理数据
small_diamonds_sum <-
group_by(small_diamonds, color, cut) %>% # 注意分组的顺序, 主分类放前面
summarise(count = n()) %>%
arrange(color, desc(cut)) %>%
mutate(cum_count = cumsum(count)) # 因为进行了分组, 所以只在组内累加
# 作图
ggplot(data = small_diamonds_sum, aes(x = color, y = count)) +
geom_col(aes(fill = cut), width = 0.7) +
scale_fill_lancet() +
scale_y_continuous(expand = c(0, 0)) +
geom_text(aes(x = color,
y = cum_count - 0.5*count,
label = count),
color = 'white',
size = 3.5) +
theme_classic()
small_diamonds_sum <-
group_by(small_diamonds, color, cut) %>% # 注意分组的顺序, 主分类放前面
summarise(count = n()) %>%
arrange(color, desc(cut)) %>%
mutate(cum_count = cumsum(count)) %>% # 因为进行了分组, 所以只在组内累加
mutate(prop = count / sum(count)) %>% # 因为进行了分组, 所以 sum 为组内总数
mutate(cum_prop = cumsum(prop))
ggplot(data = small_diamonds_sum, aes(x = color, y = prop)) +
geom_col(aes(fill = cut), width = 0.7) +
scale_fill_lancet() +
scale_y_continuous(expand = c(0, 0)) +
geom_text(aes(x = color,
y = cum_prop - 0.5*prop,
label = scales::percent(prop, accuracy = 1)),
# scales 包中将数值转换为百分比并保留小数
color = 'white',
size = 3.5) +
theme_classic()
### 并排排列
ggplot(data = small_diamonds_sum, aes(x = color, y = count)) +
geom_col(aes(fill = cut), width = 0.7,
position = 'dodge') + # dodge - 躲避
scale_fill_lancet() +
scale_y_continuous(expand = c(0, 0)) +
theme_classic()
-————————————————————————————————————————————————————————————————
## 读取数据
browsers <- read.csv(file =
'/Volumes/Cullwen_SSD/Genek/5 R语言出版级绘图(2020)/plot_training_20200701/data/ggplot/browers.csv'
# 绝对路径读取
) %>%
arrange(desc(version)) %>%
mutate(cumsum_share = cumsum(share))
## 准备数据
browsers_sum <- group_by(browsers, browser) %>%
summarise(browser_share = sum(share)) %>%
arrange(desc(browser)) %>%
mutate(cumsum_browser_share = cumsum(browser_share))
library(ggrepel)
ggplot(data = browsers_sum,
aes(x = 'Cullwen', y = browser_share)) +
# 单个柱子不需要设置 x 的映射
geom_col(color = 'black', aes(fill = browser)) +
geom_text(data = filter(
browsers_sum, browser_share >= 5),
size = 3,
aes(y = cumsum_browser_share - 0.5*browser_share, # 标签的位置
label = str_c(browser, '\n', browser_share, '%'))) +
# 字符串拼接, '\n' 换行符
geom_text_repel(data = filter(
browsers_sum, browser_share < 5),
size = 3,
nudge_y = 7,
segment.color = 'black', # 引线的颜色
min.segment.length = 0, # 不管扰动距离多大都加引线
aes(y = cumsum_browser_share - 0.5*browser_share,
label = str_c(browser, '\n', browser_share, '%'))) +
scale_fill_lancet() +
coord_polar(theta = 'y') + # 换成极坐标系, 并根据 y 折叠
theme_nothing()
# theme_void() # 保留图列的主题
ggplot(data = browsers_sum,
aes(x = 2, #
y = browser_share)) +
geom_col(color = 'black', aes(fill = browser)) +
geom_text(data = filter(
browsers_sum, browser_share >= 5),
size = 3,
aes(y = cumsum_browser_share - 0.5*browser_share,
label = str_c(browser, '\n', browser_share, '%'))) +
geom_text_repel(data = filter(
browsers_sum, browser_share < 5),
size = 3,
min.segment.length = 0,
aes(x = 2.5, #
y = cumsum_browser_share - 0.5*browser_share,
label = str_c(browser, '\n', browser_share, '%'))) +
xlim(0.5, 2.7) + #
scale_fill_lancet() +
coord_polar(theta = 'y') +
theme_nothing()
# theme_void() # 保留图列的主题
# a柱状图 + b柱状图 + y轴扭转
# a 柱子
ggplot(data = browsers_sum,
aes(x = 2, # 设置柱子的位置, 方便标签的设置
y = browser_share)) +
geom_col(color = 'black', width = 0.95, # 两个柱子的间隙
aes(fill = browser)) +
# 正常标签
geom_text(data = filter(
browsers_sum, browser_share >= 5), # 分组设置标签
size = 3,
aes(x = 2, y = cumsum_browser_share - 0.5*browser_share,
label = str_c(browser, '\n', browser_share, '%'))) +
# 重叠标签
geom_text_repel(data = filter(
browsers_sum, browser_share < 5),
size = 3,
nudge_y = 7, # 扰动设置, 在非极坐标下较为容易观察设置
segment.color = 'black',
min.segment.length = 0,
aes(y = cumsum_browser_share - 0.5*browser_share,
label = str_c(browser, '\n', browser_share, '%'))) +
# b 柱子
geom_col(data = browsers,
aes(x = 3, y = share, fill = version),
color = 'black',
width = 0.95) +
# 正常标签
geom_text(data = filter(
browsers, share >= 4),
size = 3,
aes(x = 3, y = cumsum_share - 0.5*share,
label = str_c(version, '\n', share, '%'))) +
# 重叠标签
geom_text_repel(data = filter(
browsers, share < 4),
size = 3,
nudge_x = 0.1,
segment.color = 'black',
min.segment.length = 0,
aes(x = 3.5, y = cumsum_share - 0.5*share,
label = str_c(version, '\n', share, '%'))) +
scale_fill_igv() +
coord_polar(theta = 'y') +
theme_nothing()
-————————————————————————————————————————————————————————————————
data('diamonds')
ggplot(data = diamonds, aes(x = price)) +
geom_histogram(color = 'black',
bins = 18, # 窗口数 (默认为x轴)
# binwidth = 300 - 窗口大小
position = 'dodge', # stack / fill
aes(fill = cut)) +
scale_fill_brewer(palette = 'Set1') +
scale_y_continuous(expand = c(0, 0)) +
scale_x_continuous(expand = c(0, 0)) +
# ggplot Theme Assistant
theme_minimal_hgrid() + theme(legend.position = c(0.83, 0.83))
-————————————————————————————————————————————————————————————————
## Method01
ggplot(data = diamonds, aes(x = price)) +
geom_density(aes(fill = cut), alpha = 0.25) +
scale_fill_npg() +
# ggplot Theme Assistant
theme_minimal_hgrid() + theme(legend.position = c(0.83, 0.83))
## Method02
ggplot(data = diamonds, aes(x = price)) +
geom_density(aes(color = cut)) +
scale_color_brewer(palette = 'Set1') +
# ggplot Theme Assistant
theme_minimal_hgrid() + theme(legend.position = c(0.83, 0.83))
library(ggridges) # ggplot 无法画
### 山恋图 - 密度图各组分的向上平移
ggplot(data = diamonds, aes(x = price, y = cut)) +
geom_density_ridges(aes(fill = cut), alpha = 0.7) +
scale_fill_npg() +
theme_half_open() +
# ggplot Theme Assistant
theme(legend.position = "top", legend.direction = "horizontal") + # 图列顶部, 横向
theme(legend.position = c(0.38, 0.91)) + # 图列坐标
theme(legend.title = element_text(size = 11)) + # 图列标题大小
theme(legend.text = element_text(size = 8)) # 图列模块大小
-————————————————————————————————————————————————————————————————
## 点图与散点图的区别
## 点图: y 轴 - 连续型变量, x 轴 - 离散型变量
## 散点图: x,y 轴且为连续型变量
ggplot(data = iris, aes(x= Species, y = Sepal.Length)) +
geom_point(aes(colour = Species), position = 'jitter') +
# 点的值可能重叠, jitter 参数进行扰动
theme_cowplot()
## errbar
iris_sum <-
group_by(iris, Species) %>% # 对数据进行分组
# 计算平均数和标准差并进行汇总
summarise(Sepal.Length.mean = mean(Sepal.Length),
Sepal.Length.sd = sd(Sepal.Length))
ggplot(data = iris_sum, aes(x = Species, y = Sepal.Length.mean)) +
geom_point(aes(color = Species)) +
geom_errorbar(aes(ymax = Sepal.Length.mean + Sepal.Length.sd,
ymin = Sepal.Length.mean - Sepal.Length.sd,
color = Species),
width = 0.15) +
theme_cowplot()
#### 最上面的线 - 最大值
#### 中的线 - 中位数
#### 最下方 — 最小值
#### 25%, 25%, 25%, 25%
#### 范围以外 - 异常值
library(ggsignif)
library(ggstatsplot)
ggplot(data = iris, aes(x = Species, y = Sepal.Length)) +
geom_boxplot(aes(fill = Species), color = 'black') +
# 添加 p-value
# 0.05 - 显著, 0.01 - 很显著, 0.001 - 极其显著
geom_signif(comparisons = list(c('setosa', 'versicolor'),
c('setosa', 'virginica'),
c('versicolor', 'virginica')),
y_position = c(7.1, 8.0),
map_signif_level = T) + # 已星号来表示 p-value
geom_point(size = 3, shape = 21, fill = 'white', aes(color = Species),
position = position_jitter(width = 0.25),
# 进阶的 jitter, 限制扰动区域
alpha = 0.5) +
scale_fill_npg() +
theme_cowplot()
#### 比较组内差异
#### 分面
head(iris) # 宽数据 (列有Sepal.Length, Sepal.Width, Petal.Length, Petal.Width, Species - 宽)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
# 把宽数据转换为长数据便于作图
iris_tidy <- select(iris, Sepal = Sepal.Length, Petal = Petal.Length, Species) %>%
gather(key = Tissue, value = Length, 1:2
# Sepal, Petal - 直接写列名
)
#### 作图
ggplot(data = iris_tidy, aes(x = Tissue, y = Length)) +
geom_boxplot(aes(fill = Tissue)) +
facet_grid(~Species) + # 分面 (按某一列对数据进行分类) (行~列)
geom_signif(comparisons = list(c("Petal", "Sepal"))) +
theme_cowplot()
ggplot(data = iris, aes(x = Species, y = Sepal.Length)) +
geom_violin( #
aes(fill = Species), color = 'black', width = 0.4) +
geom_signif(comparisons = list(c('setosa', 'versicolor'),
c('setosa', 'virginica'),
c('versicolor', 'virginica')),
y_position = c(7.1, 8.0),
map_signif_level = T) +
geom_point(size = 1.5, shape = 21, fill = 'white', aes(color = Species),
position = position_jitter(width = 0.05),
alpha = 0.5) +
scale_fill_npg() +
theme_cowplot()
ggplot(data = iris, aes(x = Species, y = Sepal.Length)) +
# 箱线图 + 小提琴图
geom_violin(aes(fill = Species), color = 'black', width = 0.4) +
geom_boxplot(color = 'black', width = 0.05) +
geom_signif(comparisons = list(c('setosa', 'versicolor'),
c('setosa', 'virginica'),
c('versicolor', 'virginica')),
y_position = c(7.1, 8.0),
map_signif_level = T) +
geom_point(size = 1.5, shape = 21, fill = 'white', aes(color = Species),
position = position_jitter(width = 0.05),
alpha = 0.5) +
scale_fill_npg() +
theme_cowplot() +
theme(legend.position = c(0.58, 0.07), legend.direction = "horizontal") +
theme(legend.text = element_text(size = 8.5),
legend.title = element_text(size = 8.5))
library(ggbeeswarm)
ggplot(data = iris, aes(x = Species, y = Sepal.Length)) +
geom_quasirandom( #
shape = 21, color = 'black', aes(fill = Species), method = 'smiley', width = 0.4) +
# method: quasirandom, pseudorandom, smiley, fronwney
geom_signif(comparisons = list(c('setosa', 'versicolor'),
c('setosa', 'virginica'),
c('versicolor', 'virginica')),
y_position = c(7.1, 8.0),
map_signif_level = T) +
scale_fill_npg() +
theme_cowplot()
## 小提琴图的另一种形式
## 前言
library(gghalves)
ggplot(data = iris, aes(x = Species, y = Sepal.Length)) +
geom_half_violin(
# gghalves 包: 一半小提琴图
aes(fill = Species), color = 'black', width = 0.4) +
geom_boxplot(color = 'black', width = 0.05) +
geom_signif(comparisons = list(c('setosa', 'versicolor'),
c('setosa', 'virginica'),
c('versicolor', 'virginica')),
y_position = c(7.1, 8.0),
map_signif_level = T) +
geom_point(size = 1.5, shape = 21, fill = 'white', aes(color = Species),
position = position_jitter(width = 0.05),
alpha = 0.5) +
scale_fill_npg() +
theme_cowplot() +
theme(legend.position = c(0.58, 0.07), legend.direction = "horizontal") +
theme(legend.text = element_text(size = 8.5),
legend.title = element_text(size = 8.5))
ggplot(data = iris, aes(x = Species, y = Sepal.Length)) +
geom_half_violin(
aes(fill = Species), color = 'black', width = 1, alpha = 0.5,
position = position_nudge(x = 0.3, y = 0), # 小提琴图的扰动
side = 'r', # 小提琴的方向
adjust = 0.5) + # 平滑程度
geom_boxplot(color = 'black', width = 0.05, aes(fill = Species),
position = position_nudge(x = 0.3, y = 0)) + # 箱线图的扰动
geom_signif(comparisons = list(c('setosa', 'versicolor'),
c('setosa', 'virginica'),
c('versicolor', 'virginica')),
y_position = c(7.1, 8.0),
map_signif_level = T) +
geom_point(size = 0.5, #
position = position_jitter(width = 0.15)) + #
coord_flip() + # 旋转坐标系
scale_fill_npg() +
theme_cowplot() +
theme(legend.position = c(0.58, 0.07), legend.direction = "horizontal") +
theme(legend.text = element_text(size = 8.5),
legend.title = element_text(size = 8.5))
ggplot(data = iris, aes(x = Species, y = Sepal.Length)) +
geom_half_violin(fill = 'black', color = 'black', width = 1, alpha = 0.5,
position = position_nudge(x = 0.3, y = 0),
side = 'r', #
adjust = 0.5) +
geom_half_violin(
aes(x = Species, y = Petal.Length, #
), fill = 'white', color = 'black', width = 1, alpha = 0.5,
position = position_nudge(x = 0.3, y = 0),
side = 'l', #
adjust = 0.5) +
coord_flip() +
theme_cowplot()
-————————————————————————————————————————————————————————————————
### facet_grid (简单分面)
mtcars_table <- rownames_to_column(mtcars, var = 'car') %>%
mutate(cyl = factor(cyl),
vs = if_else(vs == 1, 'V', 'L'),
am = if_else(am == 1, 'A', 'M'))
ggplot(mtcars_table, aes(x = wt, y = mpg)) +
geom_point(shape = 21,
alpha = 0.5,
aes(size = disp, fill = factor(cyl))) +
scale_fill_npg() +
scale_size(range = c(1, 20)) +
facet_grid(factor(vs)~factor(am), # 两个变量, vs(行)~am(列)
# .~factor(am), 单变量 (am)
scales = 'free') + # free, free_x, free_y
theme_bw()
### facet_wrap (设置行列分面)
small_diamonds <- sample_n(diamonds, size = 500)
ggplot(data = small_diamonds, aes(x = carat, y = price)) +
geom_point(shape = 21, size = 2,
color = 'black', aes(fill = cut)) +
scale_fill_npg() +
facet_wrap(~color, nrow = 2) + # 单个变量 (color), 设置行列数
theme_bw() +
theme(legend.position = c(0.89, 0.245))
### acet_matrix (多变量分面)
library(ggforce)
ggplot(mtcars_table, aes(x = .panel_x, y = .panel_y)) + # 非固定x, y轴映射
geom_point(shape = 21,
aes(fill = factor(cyl))) +
scale_fill_npg() +
facet_matrix(vars(mpg, disp, wt, qsec)) + # 四个变量
# facet_matrix(rows = vars(disp, wt), cols = var(mpg, qsec)) - 指定行列变量
theme_bw()
### 根据 映射/类别 放大
library(ggforce)
ggplot(iris, aes(x = Petal.Length,
y = Petal.Width, color = Species)) +
geom_point(size = 3) +
scale_color_lancet() +
theme_test() +
facet_zoom(x = Species == 'versicolor', # x = (投影到 x 轴)
zoom.size = 1) # 与原图的比例
### 根据 坐标数值范围 放大
ggplot(iris, aes(x = Petal.Length,
y = Petal.Width, color = Species)) +
geom_point(size = 3) +
scale_color_lancet() +
theme_test() +
facet_zoom(xlim = c(3.5, 5.3), # 设置放大的区域 x 轴范围
ylim = c(1.3,2), # 设置放大的区域 y 轴范围
zoom.size = 1)
### 放大部分添加标签
### 散点在 主, 放大图层都显示
### 标签仅在放大图层显示
### 对数据进行 Filter, 实现就一部分数据加标签
### facet_zoom: zoom = NA (主图和放大部分都显示)
### zoom = TRUE (仅在放大部分显示) (标签图层的数据输入)
### 将放大部分的点另存为一个 table, zoom 赋值为 TRUE
iris_tbl_all <- mutate(iris, zoom = NA)
iris_tbl_zoom <- filter(iris,
Petal.Length >= 3.5 & Petal.Length <= 5.3 &
Petal.Width >= 1.3 & Petal.Width <= 2) %>%
mutate(zoom = TRUE)
ggplot(iris_tbl_all, aes(x = Petal.Length,
y = Petal.Width, color = Species)) +
geom_point(size = 3) +
geom_text_repel(data = iris_tbl_zoom, aes(label = Species), nudge_x = 0.06, nudge_y = 0.005,box.padding = 0.2, size = 3) +
scale_color_lancet() +
theme_test() +
facet_zoom(xlim = c(3.5, 5.3), # 设置放大的区域 x 轴范围
ylim = c(1.3,2), # 设置放大的区域 y 轴范围
zoom.size = 1,
zoom.data = zoom) # NA, T, F 的信息处在列
### 离散型比变量的局部放大
### 准备数据
data("diamonds")
set.seed(100)
small_diamonds_zoom <- sample_n(diamonds, size = 500)
### facet_zoom 不适用于离散型变量
ggplot(data = small_diamonds_zoom, aes(x = color,y = price)) +
geom_point(shape = 21, size = 4, aes(fill = cut)) +
scale_fill_npg() +
theme_classic() +
facet_zoom(xlim = c('F', 'G', 'H'))
## Error in `train_scales()`:
## ! facet_zoom doesn't support zooming in discrete scales
# Error: facet_zoom doesn't support zooming in discrete scales (不适用于离散型变量)
### 优化处理数据
small_diamonds_zoom <- sample_n(diamonds, size = 500) %>%
mutate(color_num = as.numeric(color)) # 把离散型变量数字化 (首字母的大小)
### 作图
ggplot(data = small_diamonds_zoom, aes(x = color_num,y = price)) +
geom_point(shape = 21, size = 4, aes(fill = cut)) +
scale_fill_npg() +
theme_bw() +
scale_x_continuous(name = "color", # 修改 x 轴的标题
breaks = 1:7, # 设置 x 轴刻度间隔
labels = c('D', 'E', 'F', 'G', 'H', 'I', 'J')) + # 设置刻度对应的标签
facet_zoom(xlim = c(2:4))
### 圈出特定的点
library(ggforce)
ggplot(iris, aes(x = Petal.Length, y = Petal.Width, color = Species)) +
geom_point(size = 3) +
geom_mark_hull( # hull - 多边形, ellipse - 椭圆标记
aes(label = Species) # 添加标签
) +
scale_color_lancet() +
theme_test() +
theme(legend.position = 'none')
### 图中表
#### 生成统计表
iris_stat <- group_by(iris, Species) %>%
summarise(Petal.Length = mean(Petal.Length),
Petal.Width = mean(Petal.Width))
view(iris_stat)
#### 生成位置表
tbl_pos <- tibble(x = 9, y = 0.1, tb = list(iris_stat)) # tb 里存着的就是子表
# 子表的 x, y坐标, tb里放着子表 (统计表 - list - 可以是多个)
#### 作图
library(ggpmisc)
ggplot(iris, aes(x = Petal.Length, y = Petal.Width, color = Species)) +
geom_point(size = 3) +
geom_table( #
data = tbl_pos, # 切换数据 (位置表)
aes(x = x, y = y, # 主图层的 x, y 轴
label = tb)) + # 统计表
scale_color_lancet() +
theme_test() +
theme(legend.position = c(0.1, 0.8))
### 图中图
#### 生成子图
p0 <- ggplot(iris, aes(Species, Sepal.Length)) +
geom_boxplot(aes(fill = Species), outlier.shape = 21) +
scale_fill_lancet() +
theme_classic() +
theme(legend.position = 'none',
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
plot.background = element_blank())
#### 生成位置表
plot_pos <- tibble(x = 9, y = 0.1, plot = list(p0)) # plot 里存的就是子图
view(plot_pos)
#### 作图
library(ggpmisc)
ggplot(iris, aes(x = Petal.Length, y = Petal.Width, color = Species)) +
geom_point(size = 3) +
geom_plot( #
data = plot_pos,
aes(x = x, y = y,
label = plot)) +
scale_color_lancet() +
theme_test() +
theme(legend.position = c(0.1, 0.8))
-————————————————————————————————————————————————————————————————
library(patchwork) # 最常用的拼图包
### 准备图片
p1 <- ggplot(mtcars) +
geom_point(aes(mpg, disp, color = factor(cyl))) +
ggtitle('Plot 1')
p2 <- ggplot(mtcars) +
geom_boxplot(aes(gear, disp, group = gear, fill = factor(gear))) +
ggtitle('Plot 2')
p3 <- ggplot(mtcars) +
geom_point(aes(hp, wt, color = factor(cyl))) +
ggtitle('Plot 3')
p4 <- ggplot(mtcars) +
geom_bar(aes(gear)) +
facet_wrap(~cyl) +
ggtitle('Plot 4')
### 默认拼图
p1 + p2 + p3
p1 + p2 + p3 + p4 # 简单拼图
### '|' 左右拼图, '/' 上下拼图
(p1 | p2) / p3 / p4
### 自定义拼图
p1 / ((p2/p3) | p4)
### 调整比例
p1 / ((p2/p3) | p4) + plot_layout(heights = c(1,3), widths = c(1,6))
### 添加图序号
p1 / ((p2/p3) | p4) +
plot_annotation(tag_levels = 'A') # 'a', 'I', '1'
### 同时设置图的主题元素
p1 / ((p2/p3) | p4) +
plot_layout(heights = c(1,3), widths = c(1,6)) +
plot_annotation(tag_levels = 'A') & # '&' 同时设置
theme_bw() & # 主题风格
theme(axis.title = element_text(size = 20)) # 主题字体大小
### 图列收集
#### 默认 (自动识别相同的图列并收集)
p6 <- p1 / ((p2/p3) | p4) +
plot_layout(heights = c(1,3), widths = c(1,6)) +
plot_annotation(tag_levels = 'A') + # 'a', 'I', '1'
plot_layout(guides = 'collect') &
theme_bw() # 主题风格 (& 同时设置)
p6
#### 设置一个空区域放置图列
p5 = p3
p1 + p3 + p5 +
plot_layout(guides = 'collect') + # 收集图列
guide_area() & # 设置一个空区域放置收集好的图列
theme_bw()
-————————————————————————————————————————————————————————————————
## 反应交集
## 在线工具: http://www.interactivenn.net/
library(ggvenn) # ggplot 的语法 (https://github.com/yanlinlin82/ggvenn)
## 读取数据 (需要向量 / 储存向量的 list)
Orthogroups <- read_delim("/Volumes/Cullwen_SSD/Genek/5 R语言出版级绘图(2020)/plot_training_20200701/data/Orthogroups.GeneCount.tsv",
"\t", escape_double = FALSE, trim_ws = TRUE)
## 处理数据
sesame <- filter(Orthogroups, Sind > 0) %>% pull(Orthogroup)
tomato <-filter(Orthogroups, Slyc > 0) %>% pull(Orthogroup)
potato <- filter(Orthogroups, Stub > 0) %>% pull(Orthogroup)
grape <- filter(Orthogroups, Vvin > 0) %>% pull(Orthogroup)
venn <- list(sesame = sesame, tomato = tomato, potato = potato, grape = grape)
## data type: list, data.frame
ggvenn(venn, c("sesame", "tomato", "potato", "grape"),
stroke_linetype = 1, # 线条的类型
stroke_size = 1, # 线条粗细
set_name_color = "black",
set_name_size = 5,
# digits = 2, # 保留百分比后几位
show_percentage = FALSE) + # 展示百分比 (默认为 TRUE)
scale_fill_npg()
library(VennDetail) # https://github.com/guokai8/VennDetail
res_venndetail <- venndetail(venn) # list 列表
result_venndetail <- result(res_venndetail)
# view(head(result_venndetail)) # 列出重叠 / 不相交的值 / ID
# filter(result_venndetail, Subset == 'sesame_tomato_potato') # 查看 sesame_tomato_potato 相交ID的个数
### 韦恩图最大长度为 6 个向量
### usetplot - 韦恩图另一种形式
library(UpSetR)
library(ggupset)
library(ComplexUpset)
### 处理数据
Orthogroup_usetplot <- select(Orthogroups, -Total)
Orthogroup_usetplot[2:5] <- Orthogroup_usetplot[2:5] > 0
### 作图
upset(Orthogroup_usetplot,
# c(Slyc, Stub, Sind, Vvin), # 手动输入
colnames(Orthogroup_usetplot[2:5]),
height_ratio = 0.4,
width_ratio = 0.2,
min_size = 0,
name = NULL) # x 轴范围
-————————————————————————————————————————————————————————————————
# https://blog.csdn.net/woodcorpse/article/details/106553931
# 与树状图类似 (适合数据量多)
# devtools::install_github("hannet91/ggcor")
library(ggcor)
# mechanical 与 performance 相关
mtcar_mechanical <- mtcars[, c('mpg', 'qsec')]
mtcar_performance <- mtcars[, c('cyl', 'disp', 'hp', 'drat', 'wt', 'vs', 'am')]
# 相关系数矩阵图
# 表内任意两列间的相关系数
quickcor(mtcar_performance,
type = 'upper', # upper, lower, full
cor.test = T) + # 计算 P 值
geom_square() + # geom_square, geom_circle2, geom_color, geom_ellipse2
geom_mark(size = 2.5) + # 字体大小
scale_fill_gradient2(low = '#2D4971',
high = '#9F192B',
mid = 'white',
midpoint = 0) # 连续型数据图列映射
quickcor(mtcar_performance,
cor.test = T) + # 设置背景主题默认 fill
geom_square(data = get_data( # get_data 获取背景图层的数据映射
type = 'upper', # 把相关系数矩阵 (右上角)
show.diag = F)) + # 忽略对角线
geom_mark(data = get_data(
type = 'lower', # P 值 (左下角)
show.diag = F),
size = 2.5) +
geom_abline(slope = -1, # 斜率
intercept = 8, # 截距
linetype = 'dotted',
color='grey',
size = 1) + # 添加对角分割线
# solid, dashed, dotted, dotdash, longdash, twodash
scale_fill_gradient2(low = '#2D4971',
high = '#9F192B',
mid = 'white', midpoint = 0) +
theme_cowplot()
# 两表间的相关系数图
link_cor <- correlate(mtcar_mechanical, mtcar_performance, cor.test = T) %>% # 计算两表的相关性
as_cor_tbl() %>% # 把结果转换为 table 格式
select(mechanical = .row.names, performance = .col.names, r, p.value) %>% # 修改列名
mutate( # 增加列
rd = cut(r, breaks = c(-Inf, 0.2, 0.4, Inf), # 将 r 列的数据切为为多个窗口
# (-∞, 0.2), (0.2, 0.4), (0.4, +∞), 线条粗细的映射
labels = c("< 0.2", "0.2 - 0.4", ">= 0.4")), # 并给予标签, 图列
pd = cut(p.value, breaks = c(-Inf, 0.01, 0.05, Inf),
labels = c("< 0.01", "0.01 - 0.05", ">= 0.05")))
view(link_cor)
# 作图
# part01
quickcor(mtcar_performance,
type = 'upper',
cor.test = T) +
geom_square() +
geom_mark(size = 2.5) +
scale_fill_gradient2(low = '#2D4971',
high = '#9F192B',
mid = 'white', midpoint = 0) +
# part02
ggcor::anno_link(data = link_cor, aes(color = pd, size = rd)) +
scale_size_manual(values = c(0.5, 1, 2)) +
scale_colour_manual(values = c("#D95F02", "#1B9E77", "#A2A2A288"))
## Error: 'anno_link' is not an exported object from 'namespace:ggcor'
# WGCNA 应用
# 加载数据
load("/Volumes/Cullwen_SSD/Genek/5 R语言出版级绘图(2020)/plot_training_20200701/data/ggcor/wgcna.rdata")
view(net) # WGCNA 生成
# 处理数据
WGCNA_MEs <- net$MEs
view(head(WGCNA_MEs))
colnames(WGCNA_MEs) <- # 读取列名
str_remove(colnames(WGCNA_MEs), 'ME') # 赋予修改 (移除 MEs 字符)
view(head(WGCNA_MEs))
# 准备数据
link_cor_WGCNA <-
correlate(datTraits[,1:3], # 选取 WGCNA结果 (datTraits) 的 1-3 列数据
WGCNA_MEs,
cor.test = T,
use = "p") %>% # 跳过空值
as_cor_tbl() %>%
select(Traits = .row.names,
Modules = .col.names,
r, p.value) %>%
mutate(rd = cut(r, breaks = c(-Inf, 0.2, 0.4, Inf),
labels = c("< 0.2", "0.2 - 0.4", ">= 0.4")),
pd = cut(p.value, breaks = c(-Inf, 0.01, 0.05, Inf),
labels = c("< 0.01", "0.01 - 0.05", ">= 0.05")))
# 作图
# part01
quickcor(WGCNA_MEs,
type = 'upper',
cor.test = T,
show.diag = F) +
geom_square() +
scale_fill_gradient2(low = '#2D4971',
high = '#9F192B',
mid = 'white', midpoint = 0) +
# part02
ggcor::anno_link(data = link_cor_WGCNA, aes(color = pd, size = rd)) +
scale_size_manual(values = c(0.5, 1, 1.5)) +
scale_colour_manual(values = c("#D95F02", "#1B9E77", "#A2A2A288")) +
# + AI 调整图列位置
add_link(link_cor_WGCNA, mapping = aes(colour = p.value, size = r),
diag.label = TRUE) +
scale_size_manual(values = c(0.5, 1.5, 3)) +
geom_diag_label() + remove_axis("x")
## Error: 'anno_link' is not an exported object from 'namespace:ggcor'
-————————————————————————————————————————————————————————————————
## 相当于矩阵中的数值用颜色的深浅表示
## cluster 聚类 (表达模式相似 - 随着细胞周期变化 A基因上调, B基因下调...)
library(pheatmap)
### 导入数据
load(file = '/Volumes/Cullwen_SSD/Genek/5 R语言出版级绘图(2020)/plot_training_20200701/data/geo-cesc/prepare.rdata')
load(file = '/Volumes/Cullwen_SSD/Genek/5 R语言出版级绘图(2020)/plot_training_20200701/data/geo-cesc/de.rdata')
sample_info_pheatmap <- sample_info
de_result_pheatmap <- de_result
### 处理数据
#### 提取 cancer 和 normal 样本编号
cancer_normal_samples <-
rownames_to_column(sample_info, var = 'sample_id') %>%
filter(group == 'Cancer' | group == 'Normal') %>% # '==' 判断
pull(sample_id)
#### 提取 cancer 和 normal 样本信息表
cancer_normal_samples_info <-
sample_info[cancer_normal_samples, ] # cancer_normal_samples 存有目标样品的编号
# 匹配 cancer_normal_samples 里编号, 提取对应行列
#### 提取前 20 个差异最大基因的表达矩阵
top20_de_pheatmap <- select(de_result_pheatmap, Gene_Symbol, one_of(cancer_normal_samples)) %>%
filter(!is.na(Gene_Symbol)) %>%
distinct(Gene_Symbol, .keep_all = T) %>%
dplyr::slice(1:20) %>%
column_to_rownames(var = 'Gene_Symbol')
#### 作图
view(top20_de_pheatmap) # 行 (样品), 列 (基因)
pheatmap(top20_de_pheatmap,
show_colnames = F, # 样品太多, 去除样品名称
cellwidth = 6,
cellheight = 6, # cell (格子), 长等宽 (正方形)
# color = colorRampPalette(c("green","white","red"))(100),
# 设置格子颜色 (过渡区间-越高越流畅) (推荐默认)
# display_numbers = T, # 格子中展示数据
fontsize = 8, # fontsize_row (col) 对行/列 字体大小单独设置
cutree_cols = 3, # 对列进行切分
annotation_col = dplyr::select( # 对切分的列注释
cancer_normal_samples_info, # 输入为 dataframe
group), # 提取样品信息表的 group 列
annotation_colors = list(
group = c('Cancer' = '#fc8d59',
'Normal' = '#99d594')), # 设置列注释颜色
)
abnormal_df <- top20_de_pheatmap
abnormal_df[1,1] = 50
view(abnormal_df)
# 默认
pheatmap(abnormal_df,
show_colnames = F,
cellwidth = 8,
cellheight = 8,
fontsize = 8,
cutree_cols = 3,
annotation_col = dplyr::select(
cancer_normal_samples_info,
group),
annotation_colors = list(
group = c('Cancer' = '#fc8d59',
'Normal' = '#99d594')))
## Method 01 (设置颜色跨度 - 次次之)
pheatmap(abnormal_df,
show_colnames = F,
cellwidth = 6,
cellheight = 6,
fontsize = 6,
cutree_cols = 2,
annotation_col = dplyr::select(
cancer_normal_samples_info,
group),
annotation_colors = list(
group = c('Cancer' = '#fc8d59',
'Normal' = '#48b9c4')),
# 通过减少格子颜色映射的跨度
color = colorRampPalette(c("#99d594","#fdfed8","#a9393d"))(18),
breaks = seq(0, 18, 1), # 最高到最低由 18 个颜色区分
legend_breaks = seq(0, 18, 1),
legend_labels = seq(0, 18, 1)
)
## Method 02 (取对数 - 次之)
pheatmap(log10(abnormal_df + 1),
# 取对数
show_colnames = F,
cellwidth = 6,
cellheight = 6,
fontsize = 6,
cutree_cols = 2,
annotation_col = dplyr::select(
cancer_normal_samples_info,
group),
annotation_colors = list(
group = c('Cancer' = '#fc8d59',
'Normal' = '#99d594'))
)
# Method 03 (标准化 - 优先)
pheatmap(abnormal_df,
scale = 'row', # row 描述基因表达量的差异 (标准化)
show_colnames = F,
cellwidth = 6,
cellheight = 6,
fontsize = 6,
cutree_cols = 2,
annotation_col = dplyr::select(
cancer_normal_samples_info,
group),
annotation_colors = list(
group = c('Cancer' = '#fc8d59',
'Normal' = '#99d594'))
)
library(ComplexHeatmap)
library(circlize)
# 默认
Heatmap(top20_de_pheatmap,
# show_column_dend = F, # 展示列的聚类
show_column_names = F, # 列名 (样品名)
# 图例加名字
name = "Expression",
# 加表标题
column_title = "Gene Expression Heatmap",
column_title_side = "top" # 'bottom - 底部'
)
# 添加注释
column_ha <- HeatmapAnnotation(
group = anno_simple(cancer_normal_samples_info$group), # 癌症/正常 (简单-色块)
test1 = anno_points(cancer_normal_samples_info$test1), # 身高 (点)
test2 = anno_lines(cancer_normal_samples_info$test2), # 体重 (折线)
test3 = anno_barplot(cancer_normal_samples_info$test3), # 年龄 (直方图)
col = list(group = c(Cancer = '#e065af', Normal = '#fee0d2'))) # 离散型变量
Heatmap(top20_de_pheatmap,
# show_column_dend = F, # 展示列的聚类
show_column_names = F, # 列名 (样品名)
# 图例加名字
name = "Expression",
# 加表标题
column_title = "Gene Expression Heatmap",
column_title_side = "top", # 'bottom - 底部'
col = colorRamp2(breaks = c(0, 10, 20),
colors = c("#6cc08b","white","#ef6a4c")),
# 通过 circlize 包创建一个 Heatmap 的颜色刻度
# border = 'black', # 外边框
rect_gp = gpar(col = '#737373', # 内边框颜色和粗细
lwd = 1.5),
row_names_gp = gpar(fontsize = 8),
column_title_gp = gpar(fontface = 'bold'), # 标题加粗
# column_km = 3, # 对列聚类进行分割
column_split = cancer_normal_samples_info$group, # 按照 group 分割 (cancer/normal)
bottom_annotation = column_ha, # class(column_ha) - funtion (注释信息)
column_gap = unit(0.015, 'npc') # 设置分割间隙距离 (npc - 相对主图的大小)
)
# 读取数据
mat_expr <- read.csv(file = '/Volumes/Cullwen_SSD/Genek/5 R语言出版级绘图(2020)/plot_training_20200701/data/ComplexHeatmap/example3/mat_expr.csv',
row.names = 1)
mat_meth <- read.csv(file = '/Volumes/Cullwen_SSD/Genek/5 R语言出版级绘图(2020)/plot_training_20200701/data/ComplexHeatmap/example3/mat_meth.csv',
row.names = 1)
sample_info_Heatmap <- read.csv(file = '/Volumes/Cullwen_SSD/Genek/5 R语言出版级绘图(2020)/plot_training_20200701/data/ComplexHeatmap/example3/sample_info.csv',
row.names = 1)
gene_info_Heatmap <- read.csv(file = '/Volumes/Cullwen_SSD/Genek/5 R语言出版级绘图(2020)/plot_training_20200701/data/ComplexHeatmap/example3/gene_info.csv',
row.names = 1)
# Heatmap01 <-
# 关于样品的排序, 第一个热图聚类后顺序已决定, 后面的热图依赖首个热图
# No.1 Heatmap
Heatmap(mat_meth,
name = 'Methylation', # 图列名称
col = colorRamp2(breaks = c(0, 0.5, 1),
colors = c("blue", "white", "red")), # cell 颜色刻度和映射
show_row_names = F,
cluster_columns = F,
column_title = 'Methylation', # 列的标题
column_title_gp = gpar(fontface = 'bold'),
top_annotation = HeatmapAnnotation( # 注释的设置
type = sample_info_Heatmap$type,
col = list(type = c("Tumor" = "pink",
"Control" = "royalblue")), # 离散型变量
annotation_name_side = 'left'), # 位置放在坐标
row_km = 2, # 对行聚类分割 (2类)
row_split = gene_info_Heatmap$meth_direction # 根据 gene_info_Heatmap$meth_direction 分割
) +
# No.2 Heatmap
Heatmap(gene_info_Heatmap$meth_direction,
name = 'meth direction',
col = c('hypo' = 'blue', 'hyper' = 'red')) +
# No.3 Heatmap
Heatmap(mat_expr,
name = 'Expression',
col = colorRamp2(breaks = c(-2, 0, 2),
colors = c("green", "white", "red")),
show_row_names = F,
cluster_columns = F,
column_title = 'Expression',
column_title_gp = gpar(fontface = 'bold'),
top_annotation = HeatmapAnnotation(
type = sample_info_Heatmap$type,
col = list(type = c("Tumor" = "pink",
"Control" = "royalblue")),
show_annotation_name = F)) +
# No.4 Heatmap
Heatmap(gene_info_Heatmap$cor_pvalue, # 相关性的可靠程度
name = '-log10(cor_p)',
col = colorRamp2(breaks = c(0, 2, 4), # 连续型变量
colors = c("white", "white", "red"))) +
# No.5 Heatmap
Heatmap(gene_info_Heatmap$gene_type,
name = 'gene_type',
col = c('protein_coding' = '#8DD3C7',
'psedo-gene' = '#FEFBB3',
'lincRNA' = '#BEBADA',
'others'= '#F07F70',
'microRNA' = '#80B1D4')) +
# No.6 Heatmap
Heatmap(gene_info_Heatmap$gene_anno,
name = 'gene_anno') +
# No.7 Heatmap
rowAnnotation(dist_tss =
anno_barplot(gene_info_Heatmap$dist)) # 直方图的形式
Heatmap01 <- draw(Heatmap01, heatmap_legend_side = 'bottom') # 将图列放在底部
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'draw': object 'Heatmap01' not found
top100_de_Heatmap <- select(de_result_pheatmap, Gene_Symbol, one_of(cancer_normal_samples)) %>%
filter(!is.na(Gene_Symbol)) %>%
distinct(Gene_Symbol, .keep_all = T) %>%
dplyr::slice(1:100) %>%
column_to_rownames(var = 'Gene_Symbol')
# 行注释标注关键基因
## 生成数据
set.seed(123) # 随机种子
key_genes_Heatmap02 <- sample(rownames(top100_de_Heatmap),
size = 20,
replace = F)
## 判断 key_genes_Heatmap02 位于 top100_de_Heatmap 里的位置
which(rownames(top100_de_Heatmap) %in% key_genes_Heatmap02)
## [1] 7 9 14 25 26 31 36 42 43 50 51 57 67 69 72 79 87 90 95 97
## 构建注释信息 funtion
row_ha <-
rowAnnotation(
key_genes_Heatmap02 =
anno_mark(at = which(rownames(top100_de_Heatmap) %in% key_genes_Heatmap02),
labels = key_genes_Heatmap02,
labels_gp = gpar(fontsize = 8)
)
)
Heatmap(top100_de_Heatmap,
show_row_names = F,
show_column_names = F,
name = 'Expressio',
column_title = 'Gene Expression',
column_title_gp = gpar(fontface = 'bold'),
col = colorRamp2(breaks = c(0, 10, 20),
colors = c("green", "white", "red")),
rect_gp = gpar(col = 'white', # 添加内框
lwd = 1),
column_km = 3,
column_gap = unit(0.015, 'npc'),
right_annotation = row_ha
)
column_ha <- HeatmapAnnotation(
group = anno_simple(cancer_normal_samples_info$group),
height = unit(15, 'mm'), # 癌症/正常 (简单-色块)
test1 = anno_points(cancer_normal_samples_info$test1, # 身高 (点)
axis = F, # 不显示标签
size = unit(1.5, 'mm'), # 设置点的大小
height = unit(5, 'mm')), # 设置高度
test2 = anno_lines(cancer_normal_samples_info$test2, # 体重 (折线)
axis_param = list(at = c(0, 50, 100),
labels = c('', #
'50', '100')),
# 0 刻度对应的标签设置为空字符串, 避免与下一个图的 100 刻度标签重叠
height = unit(5, 'mm')),
test3 = anno_barplot(cancer_normal_samples_info$test3, # 年龄 (直方图)
height = unit(5, 'mm')),
col = list(group = c('Cancer' = '#e065af', 'Normal' = '#fee0d2')), # 离散型变量
annotation_name_gp = gpar(fontsize = 9, # 修改注释字体大小
fontface = 'italic') # 斜体
)
Heatmap(top100_de_Heatmap,
show_row_names = F,
show_column_names = F,
name = 'Expressio',
column_title = 'Gene Expression',
column_title_gp = gpar(fontface = 'bold'),
col = colorRamp2(breaks = c(0, 10, 20),
colors = c("green", "white", "red")),
rect_gp = gpar(col = 'white', # 添加内框
lwd = 1),
column_km = 3,
column_gap = unit(0.015, 'npc'),
right_annotation = row_ha,
bottom_annotation = column_ha
)